/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    RenameAttribute.java
 *    Copyright (C) 2009-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.Utils;
import weka.core.WeightedAttributesHandler;
import weka.core.WeightedInstancesHandler;
import weka.filters.SimpleStreamFilter;

/**
 * <!-- globalinfo-start --> This filter is used for renaming attributes.<br/>
 * Regular expressions can be used in the matching and replacing.<br/>
 * See Javadoc of java.util.regex.Pattern class for more information:<br/>
 * http://java.sun.com/javase/6/docs/api/java/util/regex/Pattern.html
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -find &lt;regexp&gt;
 *  The regular expression that the attribute names must match.
 *  (default: ([\s\S]+))
 * </pre>
 * 
 * <pre>
 * -replace &lt;string&gt;
 *  The string to replace the regular expression of matching attributes with.
 *  Cannot be used in conjunction with '-remove'.
 *  (default: $0)
 * </pre>
 * 
 * <pre>
 * -remove
 *  In case the matching string needs to be removed instead of replaced.
 *  Cannot be used in conjunction with '-replace &lt;string&gt;'.
 *  (default: off)
 * </pre>
 * 
 * <pre>
 * -all
 *  Replaces all occurrences instead of just the first.
 *  (default: only first occurrence)
 * </pre>
 * 
 * <pre>
 * -R &lt;range&gt;
 *  The attribute range to work on.
 * This is a comma separated list of attribute indices, with "first" and "last" valid values.
 *  Specify an inclusive range with "-".
 *  E.g: "first-3,5,6-10,last".
 *  (default: first-last)
 * </pre>
 * 
 * <pre>
 * -V
 *  Inverts the attribute selection range.
 *  (default: off)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision$
 */
public class RenameAttribute extends SimpleStreamFilter implements WeightedInstancesHandler, WeightedAttributesHandler {

    /** for serialization. */
    private static final long serialVersionUID = 4216491776378279596L;

    /** the regular expression that the attribute names have to match. */
    protected String m_Find = "([\\s\\S]+)";

    /** the regular expression to replace the attribute name with. */
    protected String m_Replace = "$0";

    /** the attribute range to work on. */
    protected Range m_AttributeIndices = new Range("first-last");

    /** whether to replace all occurrences or just the first. */
    protected boolean m_ReplaceAll = false;

    /**
     * Returns a string describing this filter.
     * 
     * @return a description of the filter suitable for displaying in the
     *         explorer/experimenter gui
     */
    @Override
    public String globalInfo() {
        return "This filter is used for renaming attributes.\n\n" + "Regular expressions can be used in the matching and replacing.\n\n" + "See Javadoc of java.util.regex.Pattern class for more information:\n" + "http://java.sun.com/javase/6/docs/api/java/util/regex/Pattern.html";
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     */
    @Override
    public Enumeration<Option> listOptions() {

        Vector<Option> result = new Vector<Option>(6);

        result.addElement(new Option("\tThe regular expression that the attribute names must match.\n" + "\t(default: ([\\s\\S]+))", "find", 1, "-find <regexp>"));

        result.addElement(new Option("\tThe string to replace the regular expression of matching attributes with.\n" + "\tCannot be used in conjunction with '-remove'.\n" + "\t(default: $0)", "replace", 1, "-replace <string>"));

        result.addElement(new Option("\tIn case the matching string needs to be removed instead of replaced.\n" + "\tCannot be used in conjunction with '-replace <string>'.\n" + "\t(default: off)", "remove", 0, "-remove"));

        result.addElement(new Option("\tReplaces all occurrences instead of just the first.\n" + "\t(default: only first occurrence)", "all", 0, "-all"));

        result.addElement(new Option("\tThe attribute range to work on.\n" + "This is a comma separated list of attribute indices, with " + "\"first\" and \"last\" valid values.\n" + "\tSpecify an inclusive range with \"-\".\n" + "\tE.g: \"first-3,5,6-10,last\".\n" + "\t(default: first-last)", "R", 1, "-R <range>"));

        result.addElement(new Option("\tInverts the attribute selection range.\n" + "\t(default: off)", "V", 0, "-V"));

        result.addAll(Collections.list(super.listOptions()));

        return result.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     * -find &lt;regexp&gt;
     *  The regular expression that the attribute names must match.
     *  (default: ([\s\S]+))
     * </pre>
     * 
     * <pre>
     * -replace &lt;string&gt;
     *  The string to replace the regular expression of matching attributes with.
     *  Cannot be used in conjunction with '-remove'.
     *  (default: $0)
     * </pre>
     * 
     * <pre>
     * -remove
     *  In case the matching string needs to be removed instead of replaced.
     *  Cannot be used in conjunction with '-replace &lt;string&gt;'.
     *  (default: off)
     * </pre>
     * 
     * <pre>
     * -all
     *  Replaces all occurrences instead of just the first.
     *  (default: only first occurrence)
     * </pre>
     * 
     * <pre>
     * -R &lt;range&gt;
     *  The attribute range to work on.
     * This is a comma separated list of attribute indices, with "first" and "last" valid values.
     *  Specify an inclusive range with "-".
     *  E.g: "first-3,5,6-10,last".
     *  (default: first-last)
     * </pre>
     * 
     * <pre>
     * -V
     *  Inverts the attribute selection range.
     *  (default: off)
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {

        String tmpStr = Utils.getOption("find", options);
        if (tmpStr.length() != 0) {
            setFind(tmpStr);
        } else {
            setFind("([\\s\\S]+)");
        }

        if (Utils.getFlag("remove", options)) {
            setReplace("");
        } else {
            tmpStr = Utils.getOption("replace", options);
            if (tmpStr.length() > 0) {
                setReplace(tmpStr);
            } else {
                setReplace("$0");
            }
        }

        setReplaceAll(Utils.getFlag("all", options));

        tmpStr = Utils.getOption("R", options);
        if (tmpStr.length() != 0) {
            setAttributeIndices(tmpStr);
        } else {
            setAttributeIndices("first-last");
        }

        setInvertSelection(Utils.getFlag("V", options));

        if (getInputFormat() != null) {
            setInputFormat(getInputFormat());
        }

        super.setOptions(options);

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Gets the current settings of the filter.
     * 
     * @return an array of strings suitable for passing to setOptions
     */
    @Override
    public String[] getOptions() {

        Vector<String> result = new Vector<String>();

        result.add("-find");
        result.add(getFind());

        if (getReplace().length() > 0) {
            result.add("-replace");
            result.add(getReplace());
        } else {
            result.add("-remove");
        }

        if (getReplaceAll()) {
            result.add("-all");
        }

        result.add("-R");
        result.add(getAttributeIndices());

        if (getInvertSelection()) {
            result.add("-V");
        }

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Sets the regular expression that the attribute names must match.
     * 
     * @param value the regular expression
     */
    public void setFind(String value) {
        m_Find = value;
    }

    /**
     * Returns the current regular expression for .
     * 
     * @return a string containing a comma separated list of ranges
     */
    public String getFind() {
        return m_Find;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String findTipText() {
        return "The regular expression that the attribute names must match.";
    }

    /**
     * Sets the regular expression to replace matching attribute names with.
     * 
     * @param value the regular expression
     */
    public void setReplace(String value) {
        m_Replace = value;
    }

    /**
     * Returns the regular expression to replace matching attribute names with.
     * 
     * @return the regular expression
     */
    public String getReplace() {
        return m_Replace;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String replaceTipText() {
        return "The regular expression to use for replacing the matching attribute " + "names with.";
    }

    /**
     * Sets whether to replace all occurrences or just the first one.
     * 
     * @param value if true then all occurrences are replace
     */
    public void setReplaceAll(boolean value) {
        m_ReplaceAll = value;
    }

    /**
     * Returns whether all occurrences are replaced or just the first one.
     * 
     * @return true if all occurrences are replaced
     */
    public boolean getReplaceAll() {
        return m_ReplaceAll;
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String replaceAllTipText() {
        return "If set to true, then all occurrences of the match will be replaced; " + "otherwise only the first.";
    }

    /**
     * Sets which attributes are to be acted on.
     * 
     * @param value a string representing the list of attributes. Since the string
     *              will typically come from a user, attributes are indexed from1.
     *              <br/>
     *              eg: first-3,5,6-last
     */
    public void setAttributeIndices(String value) {
        m_AttributeIndices.setRanges(value);
    }

    /**
     * Gets the current range selection.
     * 
     * @return a string containing a comma separated list of ranges
     */
    public String getAttributeIndices() {
        return m_AttributeIndices.getRanges();
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on; " + "this is a comma separated list of attribute indices, with " + "\"first\" and \"last\" valid values; specify an inclusive " + "range with \"-\"; eg: \"first-3,5,6-10,last\".";
    }

    /**
     * Sets whether to invert the selection of the attributes.
     * 
     * @param value if true then the selection is inverted
     */
    public void setInvertSelection(boolean value) {
        m_AttributeIndices.setInvert(value);
    }

    /**
     * Gets whether to invert the selection of the attributes.
     * 
     * @return true if the selection is inverted
     */
    public boolean getInvertSelection() {
        return m_AttributeIndices.getInvert();
    }

    /**
     * Returns the tip text for this property.
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String invertSelectionTipText() {
        return "If set to true, the selection will be inverted; eg: the attribute " + "indices '2-4' then mean everything apart from '2-4'.";
    }

    /**
     * Returns the Capabilities of this filter.
     * 
     * @return the capabilities of this object
     * @see Capabilities
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();

        // attributes
        result.enableAllAttributes();
        result.enable(Capability.MISSING_VALUES);

        // class
        result.enableAllClasses();
        result.enable(Capability.MISSING_CLASS_VALUES);
        result.enable(Capability.NO_CLASS);

        return result;
    }

    /**
     * Determines the output format based on the input format and returns this. In
     * case the output format cannot be returned immediately, i.e.,
     * hasImmediateOutputFormat() returns false, then this method will called from
     * batchFinished() after the call of preprocess(Instances), in which, e.g.,
     * statistics for the actual processing step can be gathered.
     * 
     * @param inputFormat the input format to base the output format on
     * @return the output format
     * @throws Exception in case the determination goes wrong
     */
    @Override
    protected Instances determineOutputFormat(Instances inputFormat) throws Exception {
        Instances result;
        Attribute att;
        ArrayList<Attribute> atts;
        int i;

        m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1);

        // generate new header
        atts = new ArrayList<Attribute>();
        for (i = 0; i < inputFormat.numAttributes(); i++) {
            att = inputFormat.attribute(i);
            if (m_AttributeIndices.isInRange(i)) {
                if (m_ReplaceAll) {
                    atts.add(att.copy(att.name().replaceAll(m_Find, m_Replace)));
                } else {
                    atts.add(att.copy(att.name().replaceFirst(m_Find, m_Replace)));
                }
            } else {
                atts.add((Attribute) att.copy());
            }
        }
        result = new Instances(inputFormat.relationName(), atts, 0);
        result.setClassIndex(inputFormat.classIndex());

        return result;
    }

    /**
     * processes the given instance (may change the provided instance) and returns
     * the modified version.
     * 
     * @param instance the instance to process
     * @return the modified data
     * @throws Exception in case the processing goes wrong
     */
    @Override
    protected Instance process(Instance instance) throws Exception {
        return (Instance) instance.copy();
    }

    /**
     * Main method for executing this filter.
     * 
     * @param args the arguments to the filter: use -h for help
     */
    public static void main(String[] args) {
        runFilter(new RenameAttribute(), args);
    }
}
